Task 1

getwd()
## [1] "C:/Users/hoaho/OneDrive/Desktop/Classworks/Math 4753 Statistic/Lab3"

Task 2

spruce=read.csv("SPRUCE.csv")
head(spruce,6)
##   BHDiameter Height
## 1       18.9   20.0
## 2       16.6   18.8
## 3       15.5   16.8
## 4       15.5   16.9
## 5       19.4   20.2
## 6       13.7   16.3

Task 3

Make a scatter plot of the data

with(spruce,plot(BHDiameter, Height, main="SPRUCE",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))

Does there appear to be a straight line relationship?

NO, it appears somewhat curved

Using trendscatter()

library(s20x)
layout(matrix(1:4,nr=2,nc=2,byrow=TRUE))
with(spruce,trendscatter(Height ~ BHDiameter , f=0.5, main = "f=0.5"))
with(spruce,trendscatter(Height ~ BHDiameter, f=0.6, main = "f=0.6"))
with(spruce,trendscatter(Height ~ BHDiameter, f=0.7, main = "f=0.7"))

Linear model object and new scatterplot

spruce.lm= with(spruce, lm(Height ~ BHDiameter))
lo <-with(spruce, loess(Height ~ BHDiameter))
with(spruce,plot(BHDiameter, Height, main="SPRUCE",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
abline(spruce.lm,col="red")
xl <- with(spruce,seq(min(BHDiameter),max(BHDiameter), (max(BHDiameter) - min(BHDiameter))/1000))
lines(xl, predict(lo,xl), col='black', lwd=2)

Comment

The straight line look a bit off. The smooth curve fit better

Task 4

layout(matrix(1:4,nr=2,nc=2,byrow=TRUE))
##Graph 1
with(spruce,plot(BHDiameter, Height, main="Fit line",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
abline(spruce.lm,col="red")
##Graph 2
with(spruce,plot(BHDiameter, Height, main="With RSS",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
yhat=with(spruce,predict(spruce.lm,data.frame(BHDiameter)))
with(spruce,{
segments(BHDiameter,Height,BHDiameter,yhat)
})
abline(spruce.lm, col="red")
RSS=with(spruce,sum((Height-yhat)^2))
RSS
## [1] 95.70281
##Graph 3
with(spruce,plot(BHDiameter, Height, main="Mean of Height vs BHDiameter",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))
with(spruce, abline(h=mean(Height)))
abline(spruce.lm,col="red")
with(spruce, segments(BHDiameter,mean(Height),BHDiameter,yhat,col="red"))
MSS=with(spruce,sum((yhat-mean(Height))^2))
MSS
## [1] 183.2447
##Graph 4
with(spruce,plot(BHDiameter, Height, main="With total deviation line segments",
   xlab="BHDiameter", ylab="Height", pch=21, bg="Blue", cex=1.2,
   xlim=c(0, 1.1*max(BHDiameter)),ylim=c(0, 1.1*max(Height))))

with(spruce,abline(h=mean(Height)))
with(spruce, segments(BHDiameter,Height,BHDiameter,mean(Height),col="green"))

TSS,RSS,MSS

TSS=with(spruce,sum((Height-mean(Height))^2))
TSS
## [1] 278.9475
MSS
## [1] 183.2447
RSS 
## [1] 95.70281

MSS/TSS

MSS/TSS
## [1] 0.6569146

This number is our coefficient of determination. It is 0.66 meaning that only about 66% of the data fit the model. Therefore, this linear model is not very good

Does TSS=MSS+RSS

MSS+RSS
## [1] 278.9475

Yes

Task 5

Summarize spruce.lm

summary(spruce.lm)
## 
## Call:
## lm(formula = Height ~ BHDiameter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9394 -0.9763  0.2829  0.9950  2.6644 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.14684    1.12131   8.157 1.63e-09 ***
## BHDiameter   0.48147    0.05967   8.069 2.09e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.678 on 34 degrees of freedom
## Multiple R-squared:  0.6569, Adjusted R-squared:  0.6468 
## F-statistic:  65.1 on 1 and 34 DF,  p-value: 2.089e-09
coef(spruce.lm)
## (Intercept)  BHDiameter 
##   9.1468390   0.4814743

What is the value of the slope?

0.4814743

What is the value of the intercept?

9.1468390

Write down the equation of the fitted line.

Height = 9.1468390 + 0.4814743*BHDiameter

Predict the Height of spruce when the Diameter is 15, 18 and 20cm

  predict(spruce.lm, data.frame(BHDiameter=c(15,18,20)))
##        1        2        3 
## 16.36895 17.81338 18.77632

Task 6

library(ggplot2)
g=ggplot(spruce, aes(x=BHDiameter,y=Height,colour=BHDiameter))
g=g+geom_point() + geom_line()+ geom_smooth(method="lm")
g+ggtitle("Height Vs BHDiameter")

Task 7

Linear Regression Line Points Only Joined Points